import scrapy


class DangdangSpider(scrapy.Spider):
    name = 'dangdang'
    allowed_domains = ['dangdang.com']
    start_urls = ['http://book.dangdang.com/']

    def parse(self, response):
        # 获取书籍列表
        for book in response.xpath('//ul[@class="bigimg"]/li'):
            yield {
                'title': book.xpath('.//a[@class="pic"]/img/@alt').get(),
                'price': book.xpath('.//p[@class="price"]/span[1]/text()').get(),
                'author': book.xpath('.//p[@class="author"]/span/a/text()').get(),
                'publisher': book.xpath('.//p[@class="publisher"]/a/text()').get(),
                'category': book.xpath('.//p[@class="search_book_category"]/a/text()').get(),
            }

        # 获取下一页并继续爬取
        next_page = response.xpath('//a[@class="next"]/@href').get()
        if next_page:
            yield response.follow(next_page, self.parse)
